import pandas as pd
import jieba as jb


class dataProcess:

    # 数据切分
    @staticmethod
    def dataLoadToList(file_path):
        data = pd.read_excel(file_path)
        commentData = data["comment_content"]
        commentDataTag = data["comment_tag"]
        return commentData, commentDataTag

    # 文档分词
    @staticmethod
    def participle(fileStream, stop_path):
        stop_list = [line[:-1] for line in open(stop_path, 'r', encoding='UTF-8')]
        result_list = []
        for sentence in list(fileStream):
            sentence = sentence.strip()
            cutList = jb.lcut(sentence)
            after_stop_list = []
            for word in cutList:
                word = word.strip()  # 词中空白字符
                if word not in stop_list and word != "":
                    after_stop_list.append(word)
            result_list.append(after_stop_list)
        return result_list

    # 单个句子分词

    @staticmethod
    def sentenceCut(sentence, stop_path):
        stop_list = [line[:-1] for line in open(stop_path, 'r', encoding='UTF-8')]
        sentence = sentence.strip()
        cutList = jb.lcut(sentence)
        after_stop_list = []
        for word in cutList:
            word = word.strip()
            if word not in stop_list and word != "":
                after_stop_list.append(word)
        return after_stop_list
